# coding:utf8
'''
#全国企业信用信息公示系统（四川）
#维护肖迪
'''
import time, urllib, StringIO
import urllib2
import sys, os
import base64, re
import json
from bs4 import BeautifulSoup
from utils import kill_captcha
import requests

reload(sys)
path = sys.path[0]
sys.setdefaultencoding("utf8")
from scpy.xawesome_time import parse_time


def time_clean(time):  # 时间清洗函数
    return parse_time(time) if parse_time(time) else ''


def html_clean(text):
    text = str(text)
    text = re.sub('<[\s\S]+?>', '', text).replace('\n', '').replace(' ', '').replace('\t', '').replace('\r', '')
    return text


def string_set(text):
    text = text.replace('\t', '').replace('\n', '').replace('\r', '').replace(' ', '')
    return text


def time_time(time):  # 时间函数
    time = time.replace('\t', '').replace('\n', '').replace('\r', '').replace(' ', '')
    list1 = []
    if time:
        time1 = re.findall("\d+", time)
        for i in time1:
            if len(i) < 2:
                j = "0" + i
                list1.append(j)
            else:
                list1.append(i)
        time2 = '-'.join(list1) + " 00:00:00"
        return time2
    else:
        return ''


def get_value(regular, text):
    try:
        value = re.findall(regular, text)[0]
        # value = re.sub('</t.>(.+?)<t.>','',value)
        return value
        # return value.replace('\t', '').replace('\n', '').replace('\r', '').replace(' ', '')
    except:
        # print 'or he'
        # print regular,text
        pass
        return ''


def get_image(image):  # 破解图片验证码函数
    # print image
    return kill_captcha(image, 'sc', 'jpeg')
    # image = base64.encodestring(image)
    # data_imge = {'data': '%s' % image, 'format': 'jpeg', 'source': 'sc'}
    # yzm = curl('http://192.168.31.121:44444/captcha', data_imge)
    # return yzm


def reclean(text, html):
    # try:
    message = re.findall(text, html)[0].replace('>', '')
    return message


# except:
#	return ''

def yearReport(pripid, l, s):
    head = {"Host": "gsxt.scaic.gov.cn", "User-Agent": "Googlebot/2.1 (+http://www.googlebot.com/bot.html)"}
    data_3 = {'method': 'qygsInfo', 'maent.pripid': '%s' % pripid, 'czmk': 'czmk8', 'random': '%s' % l}
    # html_yearReportList = curl('http://gsxt.scaic.gov.cn/ztxy.do', data_3).decode('gbk')
    html_yearReportList = s.post('http://gsxt.scaic.gov.cn/ztxy.do', headers=head, data=data_3).content.decode('gbk')

    report_year = re.findall('''onclick="doNdbg\('(.+?)'\)''', html_yearReportList)
    # print year
    if report_year == '':
        return ''
    yearReportList = ''
    for i in report_year:
        year = i
        data_2 = {"method": "ndbgDetail", "maent.pripid": "%s" % pripid, "maent.nd": i, "random": "%s" % l}
        # html_2014 = curl('http://gsxt.scaic.gov.cn/ztxy.do', data_2).decode('gbk')
        html_2014 = s.post('http://gsxt.scaic.gov.cn/ztxy.do', headers=head, data=data_2).content.decode('gbk')

        soup = BeautifulSoup(html_2014, "html.parser")
        html = html_2014.replace('\r', '').replace('\n', '').replace(' ', '').replace('\t', '')
        open(path + '/a.html', 'w').write(html)
        regNo = reclean(u'<thwidth="20%.+?>注册号</th><tdwidth="30%">(.+?)</td>', html)
        phone = reclean(u'<thwidth="20%.+?>企业联系电话</th><tdwidth="30%"(.+?)</td>', html)
        email = reclean(u'<thwidth="20%.+?>电子邮箱</th><tdwidth="30%"(.+?)</td>', html)
        zipcode = reclean(u'<thwidth="20%.+?>邮政编码</th><tdwidth="30%"(.+?)</td>', html)
        enterpriseStatus = reclean(u'<thwidth="20%.+?>企业经营状态</th><tdwidth="30%"(.+?)</td>', html)
        haveWebsite = reclean(u'<thwidth="20%.+?>是否有网站或网店</th><tdwidth="30%"(.+?)</td>', html)
        buyEquity = reclean(u'<thwidth="20%.+?>企业是否有投资信息或购买其他公司股权</th><tdwidth="30%"(.+?)</td>', html)
        equityTransfer = reclean(u'<thwidth="20%.+?>有限责任公司本年度是否发生股东股权转让</th><tdwidth="30%"(.+?)</td>', html)
        address = reclean(u'<thwidth="20%.+?>企业通信地址</th><t.+?>(.+?)</td>', html)
        employeeCount = reclean(u'<thwidth="20%.+?>从业人数</th><tdwidth="30%">(.+?)</td>', html)
        baseInfo = '{"regNo":"%s","phone":"%s","email":"%s","zipcode":"%s","enterpriseStatus":"%s","haveWebsite":"%s","buyEquity":"%s","equityTransfer":"%s","address":"%s","employeeCount":"%s"}' % (
            regNo, phone, email, zipcode, enterpriseStatus, haveWebsite, buyEquity, equityTransfer, address,
            employeeCount)
        if u'网站或网店信息' in html:
            web = re.findall('<trid="tr_wzxx_1"name="wzxx">(.+?)</tr>', html)
            if bool(web) == False:
                website = '{"type":"","name":"","link":""}'
            else:
                for i in range(len(web)):
                    web1 = re.findall('<td>(.+?)</td>', web[i])
                    website = '{"type":"%s","name":"%s","link":"%s"}' % (web1[0], web1[1], web1[2])
        else:
            website = '{"type":"","name":"","link":""}'
        # print website
        investnum = re.findall('<tr id="tr_tzrxx_\d+" name="tzrxx">([\s\S]+?)</tr>', html_2014)
        num = len(investnum)
        str2 = ''
        for i in range(num):
            investnum1 = investnum[i].replace('\t', '').replace('\n', '').replace('\r', '').replace(' ', '')
            # print investnum
            # shareholderName1=re.findall('<tdstyle="text-align:left;">(.+?)</td>',investnum[i])
            shareholderName = get_value('<divstyle="width:100px;">(.+?)</div>', investnum1)
            subConam = get_value(u'<listyle="text-align:left;">(.+?)&nbsp;</li>', investnum1)
            try:
                subConDate = time_time(re.findall(u'<listyle="text-align:center;">(\d+年\d+月\d+日)</li>', investnum1)[0])
            except:
                subConDate = ''
            try:
                subConType = re.findall(u'<lititle=".+?"style="text-align:left;">(.+?)</li>', investnum1)[0]
            except:
                subConType = ''
            try:
                paidType = re.findall(u'<lititle=".+?"style="text-align:left;">(.+?)</li>', investnum1)[1]
            except:
                paidType = ''

            paidConMoney = get_value(u'<listyle="text-align:left;">(.+?)&nbsp;</li>', investnum1)
            try:
                paidTime = time_time(re.findall(u'<listyle="text-align:center;">(\d+年\d+月\d+日)</li>', investnum1)[1])
            except:
                paidTime = ''

            # ID=re.findall('''<ahref="###"onclick="showRyxx\('(.+?)','(.+?)'\)"''',investnum[i])[0]
            # data_data={'method':'tzrCzxxDetial','maent.xh':'%s'%ID[0],'maent.pripid':'%s'%ID[1],"random":"%s"%l}
            # html_html=curl('http://gsxt.scaic.gov.cn/ztxy.do',data_data)
            # open(path+'/html_html.html','w').write(html_html)
            # regCapCur=get_value('<li style="text-align: left;vertical-align: middle;">&nbsp;(.+?)</li>',html_html).decode('gbk')
            # print regCapCur
            # subConam=get_value('<li style="text-align: right;vertical-align: middle;">(.+?)&nbsp;</li>',html_html)
            # print subConam
            # conDate=get_value('<li>(\d.+?)</li>',html_html)

            detail = '{"shareholderName":"%s","subConam":"%s","subConDate":"%s","subConType":"%s","paidConMoney":"%s","paidTime":"%s","paidType":"%s"}' % (
                shareholderName, subConam, subConDate, subConType, paidConMoney, paidTime, paidType)
            str2 = detail + ',' + str2
        investorInformations = '[%s]' % str2.rstrip('\n').rstrip(',')
        # print soup
        assetsInfotest = soup.findAll('table', {"class": "detailsList"})[3]
        assetsInfo1 = assetsInfotest.findAll('td')
        # print assetsInfo1
        generalAssets = assetsInfo1[0].string.replace(' ', '').replace('\t', '').replace('\n', '').replace('\r', '')
        # print generalAssets
        ownersEequity = assetsInfo1[1].string.replace(' ', '').replace('\t', '').replace('\n', '').replace('\r', '')
        # print ownersEequity
        revenue = assetsInfo1[2].string.replace(' ', '').replace('\t', '').replace('\n', '').replace('\r', '')
        # print revenue
        profit = assetsInfo1[3].string.replace(' ', '').replace('\t', '').replace('\n', '').replace('\r', '')
        mainRevenue = assetsInfo1[4].string.replace(' ', '').replace('\t', '').replace('\n', '').replace('\r', '')
        netProfit = assetsInfo1[5].string.replace(' ', '').replace('\t', '').replace('\n', '').replace('\r', '')
        taxPayment = assetsInfo1[6].string.replace(' ', '').replace('\t', '').replace('\n', '').replace('\r', '')
        liability = assetsInfo1[7].string.replace(' ', '').replace('\t', '').replace('\n', '').replace('\r', '')
        assets_Info = '{"generalAssets":"%s", "ownersEequity":"%s", "revenue":"%s","profit":"%s","mainRevenue":"%s","netProfit":"%s","taxPayment":"%s","liability":"%s"}' % (
        generalAssets, ownersEequity, revenue, profit, mainRevenue, netProfit, taxPayment, taxPayment)
        assets_Info = str(assets_Info).replace('\r', '')
        # print assets_Info
        # time.sleep(60)
        strr2 = ''
        for i in soup.findAll('tr', {"name": "bgxx"}):
            jj = i.findAll('td')
            if u'收起更多' in html_clean(jj[3]):
                afterChange1 = html_clean(jj[3]).split(u'更多')
                afterChange = afterChange1[1]
            else:
                afterChange = html_clean(jj[3])
            strr1 = {"changedItem": html_clean(jj[1]), "beforeChange": html_clean(jj[2]), "afterChange": afterChange,
                     "time": time_clean(html_clean(jj[4]))}
            strr1 = str(strr1)
            # print strr1
            strr2 = strr1 + ',' + strr2
        changeRecords = '[%s]' % strr2.rstrip(',')
        # html
        gqbg = re.findall('<trname="gqbg"id="tr_gqbg_\d">(.+?)</tr>', html)
        str_str1 = ''
        for i in gqbg:
            shareholderName = re.findall('<tdstyle="text-align:left;">(.*?)</td>', i)[0]
            equityBefore = re.findall('<tdstyle="text-align:left;">(.*?)</td>', i)[1]
            equityAfter = re.findall('<tdstyle="text-align:left;">(.*?)</td>', i)[2]
            time = time_time(re.findall('<tdstyle="text-align:center;">(.*?)</td>', i)[0])
            str2 = '{"shareholderName":"%s","equityAfter":"%s","equityBefore":"%s","time":"%s"}' % (
            shareholderName, equityAfter, equityBefore, time)
            str_str1 = str2 + ',' + str_str1
        try:
            equityChangeInformations = "[%s]" % str_str1.rstrip(',')
        except:
            equityChangeInformations = "[]"
        entinv = re.findall(u'对外投资信息</th></tr>([\s\S]+?)</table>', html)[0]
        enlist = re.findall('<trname="tzxx"id="tr_tzxx_\d">[\s\S]+?</tr>', entinv)
        enlist_str = ''
        for i in enlist:
            print i
            result = re.findall('<tdstyle="text-align:left;">(.+?)</td>', i)
            entName = result[0]
            regNo = result[1]
            str1 = '{"entName":"%s","entType":"","fundedRatio":"","currency":"","entStatus":"","canDate":"","esDate":"","regOrg":"","regCapcur":"","regCap":"","revDate":"","name":"","subConam":"","regNo":"%s"}' % (
            entName, regNo)
            enlist_str = str1 + ',' + enlist_str
        entinvItemList = '[%s]' % enlist_str.rstrip(',')
        print entinvItemList
        yearReportListstr1 = '{"year":"%s","entinvItemList":%s,"baseInfo":%s,"website":%s,"investorInformations":%s,"assetsInfo":%s,"equityChangeInformations":%s,"changeRecords":%s}' % (
            year, entinvItemList, baseInfo, website, investorInformations, assets_Info, equityChangeInformations,
            changeRecords)
        yearReportList = yearReportListstr1 + ',' + yearReportList

    over = '[%s]' % yearReportList.rstrip(',')
    return over


def run1(word, args='1'):
    word = word.decode('utf8').encode('gbk')
    l = int(time.time() * 1000)
    head = {"Host": "gsxt.scaic.gov.cn", "User-Agent": "Googlebot/2.1 (+http://www.googlebot.com/bot.html)"}
    # curl('http://gsxt.scaic.gov.cn/ztxy.do?method=list&djjg=&random=%s' % l, cookie=1)
    s = requests.Session()
    # s.get('http://gsxt.scaic.gov.cn/ztxy.do?method=list&djjg=&random=%s'%l,headers=head)
    while 1:
        try:
            # if 1:
            print 2
            # image = curl('http://gsxt.scaic.gov.cn/ztxy.do?method=createYzm&dt=%s&random=%s' % (l, l))
            image = s.get('http://gsxt.scaic.gov.cn/ztxy.do?method=createYzm&dt=%s&random=%s' % (l, l),
                          headers=head).content
            # open('/Users/xiaodi/Desktop/123.jpeg','w').write(image)
            yzm = get_image(image)
        except Exception, e:
            print e
            time.sleep(2)
            print '图片验证码出现错误'
            continue

        print '-' * 100
        print yzm
        data = {'currentPageNo': '1', 'yzm': '%s' % yzm, 'cxym': 'cxlist', 'maent.entname': '%s' % word}
        # print yzm
        try:
            # html = curl('http://gsxt.scaic.gov.cn/ztxy.do?method=list&djjg=&random=%s' % l, data)
            html = s.post('http://gsxt.scaic.gov.cn/ztxy.do?method=list&djjg=&random=%s' % l, headers=head,
                          data=data).content
            # open('/Users/xiaodi/Desktop/123.html','w').write(html)
        except:
            print '图片网站出现500，正在重新访问'
            time.sleep(2)
            continue

        result = re.findall("var flag = '(.*?)'", html)

        print result
        if not result[0]:
            break
        else:
            time.sleep(2)
            print '图片验证码出现错误'
    try:
        do_data = re.findall('<a href="javascript:void\(0\);" onclick="openView\((.+?)\)"', html)[0]
        print do_data
    except Exception, e:
        print e
        return ''

    sdata = do_data.split(',')
    invdata = {'method': 'qyInfo', 'djjg': '', 'maent.pripid': '%s' % sdata[0].replace("'", ""),
               'maent.entbigtype': '%s' % sdata[1].replace("'", ""), 'random': '%s' % l}
    # html1 = curl('http://gsxt.scaic.gov.cn/ztxy.do', invdata)
    html1 = s.post('http://gsxt.scaic.gov.cn/ztxy.do', headers=head, data=invdata).content
    # open(path+'/image1.html','w').write(html1)
    text = html1.replace('\r', '').replace('\n', '').replace('\t', '').replace(' ', '').decode('gbk')
    text1 = html1.replace('\r', '').replace('\n', '').replace('\t', '').decode('gbk')
    # open(path+'/image.txt','w').write(text)

    enterpriseName = get_value(u'<th>名称</th><tdwidth="30%">(.+?)</td>', text)
    frName = get_value(u'<th width="20%">法定代表人</th>.*?<td>(.+?)</td>', text1)
    regNo = get_value(u'<thwidth="20%">注册号</th><tdwidth="30%">(.+?)</td>', text)
    if u'注册资本' in text:
        regCap_1 = get_value(u'<th>注册资本</th><td>(.+?)</td>', text).replace('&nbsp;', '')
        regCap = re.findall('\d+', regCap_1)[0]
        regCapCur_1 = get_value(u'<th>注册资本</th><td>(.+?)</td>', text).split('&nbsp;')[1].replace('万', '')
        re_h = re.compile(u'^元')
        regCapCur = re_h.sub('', regCapCur_1)
    else:
        regCap = ''
        regCapCur = ''
    esDate = time_time(get_value(u'<thwidth="20%">成立日期</th><td>(\d.+?)</td>', text))
    openFrom = time_time(get_value(u'<th>营业期限自</th><td>(\d.+?)</td>', text))
    openTo = time_time(get_value(u'<th>营业期限至</th><td>(\d.+?)</td>', text))
    enterpriseType = get_value(u'<th>类型</th><td>(.+?)</td>', text)
    enterpriseStatus = get_value(u'<th>登记状态</th>(.+?)</td>', text.replace('<tdcolspan="1">', '').replace('<td>', ''))
    cancelDate = ""
    revokeDate = time_time(get_value(u"<th>吊销日期</th><td>(\d.+?)</td>", text))
    address = get_value(u'<th>住所</th><tdcolspan="3">(.+?)</td>', text)
    abultem = ""
    cbultem = ""
    operateScope = get_value(u'<th>经营范围</th><tdcolspan="3">(.+?)</td>', text.replace('<br/>', '').replace('<br>', ''))
    operateScopeAndForm = ""
    regOrg = get_value(u'<th>登记机关</th><td>(.+?)</td>', text)
    ancheYear = time_time(get_value(u'<th>核准日期</th><td>(.+?)</td>', text))
    ancheDate = time_time(get_value(u'<th>核准日期</th><td>(.+?)</td>', text))
    industryPhyName = ""
    industryCode = ""
    str1 = ''' "basicList":[{
	        "enterpriseName" :"%s",
			"frName" : "%s",
	        "regNo" : "%s",
	        "regCap" : "%s",
	        "regCapCur" : "%s",
	        "esDate" : "%s",
	        "openFrom" : "%s",
	        "openTo" : "%s",
	        "enterpriseType" : "%s",
	        "enterpriseStatus" : "%s",
	        "cancelDate": "%s",
	        "revokeDate" : "%s",
	        "address" : "%s",
	        "abultem":"%s",
	        "cbultem":"%s",
	        "operateScope":"%s",
	        "operateScopeAndForm":"%s",
	        "regOrg":"%s",
	        "ancheYear":"%s",
	        "ancheDate":"%s",
	        "industryPhyName":"%s",
	        "industryCode":"",
	        "industryName":"",
	        "recCap":"",
	        "oriRegNo":""
	    }]''' % (
        enterpriseName, frName, regNo, regCap, regCapCur, esDate, openFrom, openTo, enterpriseType, enterpriseStatus,
        cancelDate, revokeDate, address, abultem, cbultem, operateScope, operateScopeAndForm, regOrg, ancheYear,
        ancheDate,
        industryPhyName)
    # print str1

    # print investnum


    str2 = ''
    try:
        pripid = re.findall("sfgsInfo&maent.pripid=(.+?)&", text)[0]
    except:
        print text
    print '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~' + pripid
    data_2 = {"method": "ndbgDetail", "maent.pripid": "%s" % pripid, "maent.nd": "2014", "random": "%s" % l}
    # html_2014 = curl('http://gsxt.scaic.gov.cn/ztxy.do', data_2).decode('gbk')
    # investnum = re.findall('<tr id="tr_tzrxx_\d+" name="tzrxx">([\s\S]+?)</tr>', html_2014)
    # #print investnum
    # num = len(investnum)
    # #print num
    # for i in range(num):
    #     investnum1 = investnum[i].replace('\t', '').replace('\n', '').replace('\r', '').replace(' ', '')
    #     #print investnum
    #     #shareholderName1=re.findall('<tdstyle="text-align:left;">(.+?)</td>',investnum[i])
    #     shareholderName = get_value('<divstyle="width:100px;">(.+?)</div>', investnum1)
    #     subConam = get_value(u'<listyle="text-align:left;">(.+?)&nbsp;</li>', investnum1)
    #     regCapCur = get_value(u'<lititle="货币"style="text-align:left;">(.+?)</li>', investnum1)
    #     conDate = time_time(get_value(u'<listyle="text-align:center;">(\d+年\d+月\d+日)</li>', investnum1))

    #     #ID=re.findall('''<ahref="###"onclick="showRyxx\('(.+?)','(.+?)'\)"''',investnum[i])[0]
    #     #data_data={'method':'tzrCzxxDetial','maent.xh':'%s'%ID[0],'maent.pripid':'%s'%ID[1],"random":"%s"%l}
    #     #html_html=curl('http://gsxt.scaic.gov.cn/ztxy.do',data_data)
    #     #open(path+'/html_html.html','w').write(html_html)
    #     #regCapCur=get_value('<li style="text-align: left;vertical-align: middle;">&nbsp;(.+?)</li>',html_html).decode('gbk')
    #     #print regCapCur
    #     #subConam=get_value('<li style="text-align: right;vertical-align: middle;">(.+?)&nbsp;</li>',html_html)
    #     #print subConam
    #     #conDate=get_value('<li>(\d.+?)</li>',html_html)

    #     detail = '{"shareholderName":"%s","subConam":"%s","regCapCur":"%s","conDate":"%s","fundedRatio":"","country":""}' % (
    #     shareholderName, subConam, regCapCur, conDate)
    #     str2 = detail + ',\n' + str2
    # str2 = '"shareholderList":[%s]' % str2.rstrip('\n').rstrip(',')
    # if str2 == '"shareholderList":[]':
    #     str2 = ''
    #     detail2 = re.findall('<tdstyle="text-align:left;">(.+?)/td>', text)
    #     detail2 = [i.replace('<', '') for i in detail2]
    #     for i in range(0, len(detail2), 5):
    #         try:
    #             ID = re.findall('''ahref="###"onclick="showRyxx\('(.+)','(.+)'\)"''', detail2[i + 4])[0]
    #         #print '~~~~~~~~~~~~'
    #         #print ID
    #         except:
    #             ID = ('0', '1')
    #         shareholderName = detail2[i + 1]

    #         try:
    #             data_data = {'method': 'tzrCzxxDetial', 'maent.xh': '%s' % ID[0], 'maent.pripid': '%s' % ID[1],
    #                          "random": "%s" % l}
    #             html_html = curl('http://gsxt.scaic.gov.cn/ztxy.do', data_data)
    #             regCapCur = get_value('<li style="text-align: left;vertical-align: middle;">&nbsp;(.+?)</li>',
    #                                   html_html).decode('gbk')
    #             conDate = get_value('<li>(\d.+?)</li>', html_html)
    #             subConam = get_value('<li style="text-align: right;vertical-align: middle;">(.+?)&nbsp;</li>',
    #                                  html_html)
    #         except:
    #             regCapCur = ''
    #             subConam = ''
    #             conDate = ''
    #         detail = '{"shareholderName":"%s","subConam":"%s","regCapCur":"%s","conDate":"%s","fundedRatio":"","country":""}' % (
    #         shareholderName, subConam, regCapCur, conDate)
    #         str2 = detail + ',\n' + str2
    #     str2 = '"shareholderList":[%s]' % str2.rstrip('\n').rstrip(',')
    data_data = {'method': 'baInfo', 'maent.pripid': '%s' % pripid, 'czmk': 'czmk2', 'random': '%s' % l}
    # html_2 = curl("http://gsxt.scaic.gov.cn/ztxy.do", data_data)
    html_2 = s.post('http://gsxt.scaic.gov.cn/ztxy.do', headers=head, data=data_data).content
    list_list = re.findall('<td align="center" style="width:20%;text-align:left;">([\s\S]+?)/td>', html_2)
    str3 = ''
    for j in range(0, len(list_list), 2):
        k = 0
        str3_1 = '{"name":"%s","position":"%s","sex":""}' % (
            list_list[j].replace('<', '').decode('gbk'), list_list[j + 1].replace('<', '').decode('gbk'))
        str3 = str3_1 + ',\n' + str3
    str3 = '"personList":[%s]' % str3.rstrip('\n').rstrip(',')
    str4 = '"punishBreakList":[]'  # 失信被执行人信息
    str5 = '"punishedList":[]'  # 被执行人信息
    str6 = '"alidebtList":[]'  # 阿里欠贷信息
    # str7 = ''  #企业对外投资信息
    # if u'投资设立企业或购买股权企业名称' in html_2014:
    #     detail7 = \
    #     re.findall(u'<th width="50%" style="text-align:center;">投资设立企业或购买股权企业名称</th>([\s\S]+?)</table>', html_2014)[0]
    #     detail7_1 = re.findall('<td style="text-align:left;">(.+?)</td>', detail7)
    #     for i in range(0, len(detail7_1), 2):
    #         str7_1 = '{"entName":"%s","regNo":"%s","entType":"","regCap":"","regCapcur":"","canDate":"","revDate":"","entStatus":"","regOrg":"","subConam":"","currency":"","fundedRatio":"","esDate":"","name":""}' % (
    #         detail7_1[i], detail7_1[i + 1])
    #         str7 = str7_1 + ',\n' + str7
    #     str7 = '"entinvltemList":[%s]' % str7.rstrip('\n').rstrip(',')
    # else:
    #     str7 = '"entinvltemList":[%s]' % str7.rstrip('\n').rstrip(',')
    str8 = '"frinvList":[]'  # 法定代表人对外投资信息
    str9 = '"frPositionList":[]'  # 法定代表人在其他企业任职信息
    str10 = ""
    detail10 = re.findall('<tr width="95%" id="tr_bg_\d+" name="bg">([\s\S]+?)</tr>', html1)
    for i in range(len(detail10)):
        detail101 = detail10[i].replace('\r', '').replace('\n', '').replace('\t', '').decode('gbk')
        # print detail101
        altltem = re.findall('<td width="15%">(.+?)</td>', detail101)[0]
        altDate = time_time(re.findall('<td width="10%" style="text-align:center;">(.+?)</td>', detail101)[0])
        result = re.findall('<td width="25%">[\s\S]+?</td>', detail101)
        if '<span style="width: 100%;">' in result[0]:
            altBe = re.findall('<span style="width: 100%;">([\s\S]+?)</span>', result[0])[0]
        else:
            altBe = re.findall('<span id="beforeMore\d+_\d+" style="display:none;width:100%;">(.+?)<br/>', result[0])[0]
        if '<span style="width: 100%;">' in result[1]:
            altAf = re.findall('<span style="width: 100%;">([\s\S]+?)</span>', result[1])[0]
        else:
            altAf = re.findall('<span id="beforeMore\d+_\d+" style="display:none;width:100%;">(.+?)<br/>', result[1])[0]
        str10_1 = '{"altDate":"%s","altltem":"%s","altBe":"%s","altAf":"%s"}' % (altDate, altltem, altBe, altAf)
        str10 = str10_1 + ',\n' + str10
    str10 = '"alterList":[%s]' % str10.rstrip('\n').rstrip(',')
    str11 = ''
    detail11 = re.findall('<tr name="fr2" id="tr_fr2_\d+">([\s\S]+?)</tr>', html_2)
    for i in range(len(detail11)):
        detail_list = re.findall('<td style="text-align:left;">(.+?)</td>', detail11[i])
        str11_1 = '{"brName":"%s","brRegno":"%s","brPrincipal":"","cbultem":"","brAddr":""}' % (
            detail_list[1].decode('gbk'), detail_list[0].replace('\n', '').replace('\r', '').decode('gbk'))
        str11 = str11_1 + ',\n' + str11
    str11 = '"filiationList":[%s]' % str11.rstrip('\n').rstrip(',')
    str12 = '"caselnfoList":[]'
    str13 = '"sharesFrostList":[]'
    str14 = ""
    data_data = {"method": "gqczxxInfo", "maent.pripid": "%s" % pripid, "czmk": "czmk4", "random": "%s" % l}
    # html_3 = curl('http://gsxt.scaic.gov.cn/ztxy.do', data_data)
    html_3 = s.post('http://gsxt.scaic.gov.cn/ztxy.do', headers=head, data=data_data).content
    soup3 = BeautifulSoup(html_3, "html.parser")
    no = soup3.find_all(id=re.compile('tr_gq_\d+'))
    if len(no) == 0:
        str14 = ""
    else:
        for i in no:
            text = i.find_all('td')
            str14_1 = '{"impoRg":"%s","impoRgtype":"%s","impAm":"%s","imponrecDate":"","impExaeep":"","impSanDate":"%s","impTo":""}' % (
                string_set(text[5].string), string_set(text[2].string), string_set(text[4].string),
                time_time(text[7].string))
            str14 = str14_1 + ',\n' + str14
    str14 = '"shareslmpawnList":[%s]' % str14.rstrip('\n').rstrip(',')
    str15 = '"morDetailList":[]'
    str16 = '"morgualnfoList":[]'
    soup2 = BeautifulSoup(html_2, "html.parser")
    no2 = soup2.find_all('td', colspan="4")
    if len(no2) == 0:
        str17 = '"liquidationList":{}'
    else:
        liquidationList = '{"ligentity":"","ligprincipal":"%s","liqMen":"%s",liGst:"","ligEndDate":"","debtTranee":"","claimTranee":""}' % (
            string_set(no2[0].string), string_set(no2[1].string))
        str17 = str17 = '"liquidationList":%s' % liquidationList
    # print str11
    str18 = yearReport(pripid, l, s)
    str18 = '"yearReportList":%s' % str18
    result = '{%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s}' % (
        str1, str3, str4, str5, str6, str8, str9, str10, str11, str12, str13, str14, str15, str16, str17, str18)
    # print result
    # open(path+'/result.txt','w').write(result)
    if args == '1':
        print result
        return eval(result)
    if args == '0':
        return result


def run(word, args='1'):
    num = 0
    while 1:
        try:
            if num == 5:
                raise ValueError, '四川工商打不开了，稍后访问'
                # return '四川工商崩溃了'
            result = run1(word, args)
            return result
        except ValueError, ex:
            print ex
            return 'over'
            # except Exception, ex:
            #    num += 1
            #    print Exception, ":", ex
            #    print '继续访问'
            #    continue


search = run

if __name__ == '__main__':
    print json.dumps(run(u'成都念念科技有限公司'), ensure_ascii=False, indent=4)
